import subprocess
import os
import shutil
import tqdm

# Define sample rate
SAMPLE_RATE = 44100


def fetch_clip(video_id, start_time, label_list, train_or_test, num_):
    start_time = int(start_time)
    end_time = start_time + 10
    outname = f"v{video_id}_{start_time}_{end_time}"

    label_list = label_list.split('+')
    for label in label_list:
        dirr = f"{train_or_test}/{label}"
        if os.path.isfile(dirr + f"/{outname}_out.mkv"):
            
            return

    if os.path.isfile(f"{outname}.mp4"):
        print("Already have it.")
        return

    proxy_ip = "127.0.0.1" 
    proxy_port = "7890" 

    subprocess.run(
        ["yt-dlp", "-f", "bestvideo[height<=480]+bestaudio/best[height<=480]",
         f"https://youtube.com/watch?v={video_id}", "--output", f"{outname}.%(ext)s", "--merge-output-format", "mkv"]
    )

    if os.path.isfile(f"./{outname}.mkv"):
        subprocess.run(
            ["ffmpeg", "-loglevel", "quiet", "-i", f"./{outname}.mkv", "-ss", str(start_time), "-to", str(end_time),
             f"./{outname}_out.mkv"]
        )
    # subprocess.run(
    #     ["yt-dlp", "-f", "bestvideo[height<=480]+bestaudio/best[height<=480]",
    #      f"https://youtube.com/watch?v={video_id}", "--output", f"{outname}", "--merge-output-format", "mkv",
    #      "--external-downloader", "ffmpeg", "--external-downloader-args",
    #      f"-ss {start_time} -to {end_time} -loglevel quiet",
    #      "--proxy", f"http://{proxy_ip}:{proxy_port}"]
    # )
#
    # if os.path.isfile(f"./{outname}.mkv"):
    #     subprocess.run(
    #         ["ffmpeg", "-loglevel", "quiet", "-i", f"./{outname}.mkv", #  "-ss", "0", "-to", "10",
    #          # "-ss", str(start_time), "-to", str(end_time),
    #          f"./{outname}_out.mkv"]
    #     )

        for label in label_list:
            dirr = f"{train_or_test}/{label}"
            if not os.path.isdir(dirr):
                os.makedirs(dirr)
                print(dirr)
            else:
                print("dir exist")

            shutil.move(f"{outname}_out.mkv", dirr)
            os.remove(f"{outname}.mkv")
            break
    else:
        # Give the user a chance to Ctrl+C.
        import time
        time.sleep(1)


import os
import tqdm
import threading


def process_file(filename):
    with open(filename, "r") as file:
        for line in tqdm.tqdm(file):
            if not line.startswith("#"):
                line = line.strip().replace('"', '+').replace(' ', '_')
                fetch_clip(*line.split(','))  # Split line and pass arguments to fetch_clip function


original_filename = "vggsound.csv"

total_lines = sum(1 for line in open(original_filename))
lines_per_chunk = total_lines // 56

os.makedirs("split_files", exist_ok=True)

for i in range(56):
    chunk_filename = f"split_files/chunk_{i}.csv"
    with open(original_filename, "r") as infile, open(chunk_filename, "w") as outfile:
        for j, line in tqdm.tqdm(enumerate(infile)):
            if j >= i * lines_per_chunk and j < (i + 1) * lines_per_chunk:
                outfile.write(line)

threads = []
for i in range(56):
    chunk_filename = f"split_files/chunk_{i}.csv"
    thread = threading.Thread(target=process_file, args=(chunk_filename,))
    threads.append(thread)
    thread.start()

for thread in threads:
    thread.join()
